Background

This data set has been designed by Josh Mannix as a part of JOUR 301.

The data shows the correlation between players in the NFL having record setting numbers as far as yards and the amount of opportunities they receive.

Some of the more interesting take aways…

While rushing numbers and rushing attempts had an obvious relationship for running backs it did not translate to receivers or quarterbacks. Many of the receivers put up big numbers with far fewer attempts than 13. On the other end there were many QB’s with far more attempts than while very few ever eclipsed the 500 yard mark.

The other trend that I noticed was the rise in passing number over the past 2 decades. In the early 2000’s it was relatively rare to see a passer over 57 attempts, but recently it has been far more common.

Data Retrieved From https://github.com/rfordatascience/tidytuesday/blob/master/data/2018/2018-08-28/nfl_2010-2017.csv

Raw Data https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2018/2018-08-28/nfl_2010-2017.csv

Data

library(tidyverse)
library(tidytuesdayR)
NFL_Data <-tidytuesdayR::tt_load("2018-08-28")
nfl_stats <- NFL_Data$`nfl_2010-2017`
glimpse(nfl_stats)
Rows: 81,525
Columns: 23
$ ...1         <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
$ name         <chr> "Duce Staley", "Lamar Smith", "Tiki Barber", "Stephen Dav…
$ team         <chr> "PHI", "MIA", "NYG", "WAS", "IND", "BAL", "NYJ", "MIN", "…
$ game_year    <dbl> 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 200…
$ game_week    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ rush_att     <dbl> 26, 27, 13, 23, 28, 27, 30, 14, 15, 10, 20, 13, 23, 14, 2…
$ rush_yds     <dbl> 201, 145, 144, 133, 124, 119, 110, 109, 88, 87, 84, 80, 7…
$ rush_avg     <dbl> 7.7, 5.4, 11.1, 5.8, 4.4, 4.4, 3.7, 7.8, 5.9, 8.7, 4.2, 6…
$ rush_tds     <dbl> 1, 1, 2, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 3, …
$ rush_fumbles <dbl> 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 1, 1, …
$ rec          <dbl> 4, 1, 3, 4, 6, 4, 6, 2, 2, NA, 4, 3, 1, 4, 1, 1, 1, NA, N…
$ rec_yds      <dbl> 61, 12, 25, 37, 40, 32, 34, 3, 20, NA, 29, 10, -2, 100, 1…
$ rec_avg      <dbl> 15.3, 12.0, 8.3, 9.3, 6.7, 8.0, 5.7, 1.5, 10.0, NA, 7.3, …
$ rec_tds      <dbl> 0, 0, 0, 0, 1, 0, 1, 0, 0, NA, 0, 0, 0, 1, 0, 0, 0, NA, N…
$ rec_fumbles  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, NA, N…
$ pass_att     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 41, NA, NA, NA, NA, N…
$ pass_yds     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 290, NA, NA, NA, NA, …
$ pass_tds     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA…
$ int          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA…
$ sck          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA…
$ pass_fumbles <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA…
$ rate         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 102.7, NA, NA, NA, NA…
$ position     <chr> "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB", "QB…
hi_rsh <- filter(nfl_stats, rush_yds > 200) %>% 
    select(name, rush_yds, starts_with("game"))
hi_rsh %>% 
    count(game_year)
# A tibble: 17 × 2
   game_year     n
       <dbl> <int>
 1      2000    10
 2      2001     2
 3      2002     4
 4      2003     3
 5      2004     2
 6      2005     3
 7      2006     4
 8      2007     3
 9      2008     2
10      2009     5
# … with 7 more rows
glimpse(nfl_stats)
Rows: 81,525
Columns: 23
$ ...1         <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
$ name         <chr> "Duce Staley", "Lamar Smith", "Tiki Barber", "Stephen Dav…
$ team         <chr> "PHI", "MIA", "NYG", "WAS", "IND", "BAL", "NYJ", "MIN", "…
$ game_year    <dbl> 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 200…
$ game_week    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ rush_att     <dbl> 26, 27, 13, 23, 28, 27, 30, 14, 15, 10, 20, 13, 23, 14, 2…
$ rush_yds     <dbl> 201, 145, 144, 133, 124, 119, 110, 109, 88, 87, 84, 80, 7…
$ rush_avg     <dbl> 7.7, 5.4, 11.1, 5.8, 4.4, 4.4, 3.7, 7.8, 5.9, 8.7, 4.2, 6…
$ rush_tds     <dbl> 1, 1, 2, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 3, …
$ rush_fumbles <dbl> 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 1, 1, …
$ rec          <dbl> 4, 1, 3, 4, 6, 4, 6, 2, 2, NA, 4, 3, 1, 4, 1, 1, 1, NA, N…
$ rec_yds      <dbl> 61, 12, 25, 37, 40, 32, 34, 3, 20, NA, 29, 10, -2, 100, 1…
$ rec_avg      <dbl> 15.3, 12.0, 8.3, 9.3, 6.7, 8.0, 5.7, 1.5, 10.0, NA, 7.3, …
$ rec_tds      <dbl> 0, 0, 0, 0, 1, 0, 1, 0, 0, NA, 0, 0, 0, 1, 0, 0, 0, NA, N…
$ rec_fumbles  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, NA, N…
$ pass_att     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 41, NA, NA, NA, NA, N…
$ pass_yds     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 290, NA, NA, NA, NA, …
$ pass_tds     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA…
$ int          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA…
$ sck          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA…
$ pass_fumbles <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA…
$ rate         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 102.7, NA, NA, NA, NA…
$ position     <chr> "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB", "QB…
hi_pass <-filter(nfl_stats, pass_yds > 500) %>% 
    select(name, pass_yds, starts_with("game"))
hi_pass %>% 
    count(game_year)
# A tibble: 10 × 2
   game_year     n
       <dbl> <int>
 1      2000     1
 2      2006     1
 3      2009     1
 4      2011     1
 5      2012     2
 6      2013     1
 7      2014     1
 8      2015     2
 9      2016     2
10      2017     1
glimpse(nfl_stats)
Rows: 81,525
Columns: 23
$ ...1         <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
$ name         <chr> "Duce Staley", "Lamar Smith", "Tiki Barber", "Stephen Dav…
$ team         <chr> "PHI", "MIA", "NYG", "WAS", "IND", "BAL", "NYJ", "MIN", "…
$ game_year    <dbl> 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 200…
$ game_week    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ rush_att     <dbl> 26, 27, 13, 23, 28, 27, 30, 14, 15, 10, 20, 13, 23, 14, 2…
$ rush_yds     <dbl> 201, 145, 144, 133, 124, 119, 110, 109, 88, 87, 84, 80, 7…
$ rush_avg     <dbl> 7.7, 5.4, 11.1, 5.8, 4.4, 4.4, 3.7, 7.8, 5.9, 8.7, 4.2, 6…
$ rush_tds     <dbl> 1, 1, 2, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 3, …
$ rush_fumbles <dbl> 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 1, 1, …
$ rec          <dbl> 4, 1, 3, 4, 6, 4, 6, 2, 2, NA, 4, 3, 1, 4, 1, 1, 1, NA, N…
$ rec_yds      <dbl> 61, 12, 25, 37, 40, 32, 34, 3, 20, NA, 29, 10, -2, 100, 1…
$ rec_avg      <dbl> 15.3, 12.0, 8.3, 9.3, 6.7, 8.0, 5.7, 1.5, 10.0, NA, 7.3, …
$ rec_tds      <dbl> 0, 0, 0, 0, 1, 0, 1, 0, 0, NA, 0, 0, 0, 1, 0, 0, 0, NA, N…
$ rec_fumbles  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, NA, N…
$ pass_att     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 41, NA, NA, NA, NA, N…
$ pass_yds     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 290, NA, NA, NA, NA, …
$ pass_tds     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA…
$ int          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA…
$ sck          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA…
$ pass_fumbles <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA…
$ rate         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 102.7, NA, NA, NA, NA…
$ position     <chr> "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB", "QB…
hi_rec <-filter(nfl_stats, rec_yds > 200) %>% 
    select(name, pass_yds, starts_with("game"))
hi_pass %>% 
    count(game_year)
# A tibble: 10 × 2
   game_year     n
       <dbl> <int>
 1      2000     1
 2      2006     1
 3      2009     1
 4      2011     1
 5      2012     2
 6      2013     1
 7      2014     1
 8      2015     2
 9      2016     2
10      2017     1

Information about the dataset…

Rushing Attempts


labs_grp_bubble <- labs(
    title = "Rushing Attempts Leaders",
    x = "Year", y = "Name", 
    size = "Rushing Yards")

ggp2_grp_bubble <- filter(nfl_stats, 
                  rush_att > 33) |> 
    ggplot(aes(
        x = game_year, 
        y = name)) + 
    geom_point(
        aes(size = rush_yds,
            fill= name), 
        show.legend = FALSE,
        alpha = 2/3, 
        shape = 21, 
        color = "black") +
   scale_size(range = c(1,7), 
        name = "rushing yards")+
    ggthemes::theme_few(
      base_size = 11)

#color is a fill=rush_yds inside the AES funtcion

ggp2_grp_bubble + 
    labs_grp_bubble

The size of the bubble is relative to the amount of attempts above 33.

Rushing Yards


labs_scatter <- labs(
    title = "Highest Rushers",
    x = "game_year", y = "name)")
ggp2_scatter <- filter(
  nfl_stats, rush_yds > 200)|>
    ggplot(
        aes(x = game_year, 
            y = name)) +
    geom_point()

ggp2_scatter + 
    labs_scatter

Receptions


labs_grp_bubble <- labs(
    title = "Receptions >13",
    x = "Year", y = "Name", 
    size = "Rushing Yards")

ggp2_grp_bubble <- filter(
  nfl_stats, rec  > 13) |> 
    ggplot(aes(
        x = game_year, y = name)) + 
    geom_point(
        aes(size = rush_yds, fill= name), 
        show.legend = FALSE,
        alpha = 2/3, shape = 21,
        color = "black") +
   scale_size(range = c(1,7), 
              name = "rushing yards")+
    ggthemes::theme_few()

#color is a fill=rush_yds inside the AES funtcion

ggp2_grp_bubble + 
    labs_grp_bubble

The Size of the bubble is relative to the amount of receptions above 13.

Reciveing Yards


labs_scatter <- labs(
    title = "Highest Recivers",
    x = "game_year", y = "name)")
ggp2_scatter <- filter(
  nfl_stats, rec_yds > 200)|>
    ggplot(
        aes(x = game_year, 
            y = name)) +
    geom_point()

ggp2_scatter + 
    labs_scatter

Pass Attempts


labs_grp_bubble <- labs(
    title = "Pass Attempt Leaders",
    x = "Year", y = "Name", 
    size = "Rushing Yards")

ggp2_grp_bubble <- filter(
  nfl_stats, pass_att  > 57) |> 
    ggplot(aes(
        x = game_year, y = name)) + 
    geom_point(
        aes(size = rush_yds, fill= name), 
        show.legend = FALSE,
        alpha = 2/3, shape = 21,
        color = "black") +
   scale_size(range = c(1,7),
              name = "rushing yards")+
    ggthemes::theme_few()


ggp2_grp_bubble + 
    labs_grp_bubble

The size of the bubble is relative to the amount of pass attmepts over 57.

Passing Yards


labs_scatter <- labs(
    title = "Highest Passers",
    x = "game_year", y = "name)")
ggp2_scatter <- filter(
  nfl_stats, pass_yds > 500)|>
    ggplot(
        aes(x = game_year, 
            y = name)) +
    geom_point()

ggp2_scatter + 
    labs_scatter